library(scran)
library(scater)
library(HDF5Array)
library(ggplot2)
library(Matrix)
library(pheatmap)
library(RColorBrewer)
library(uwot)
library(grid)
library(gridExtra)
library(GEDI)
dir_data<- "../data_objects/"
dir_data_hdf5<- paste0(dir_data, "COVID19_SCE/")
#' Plot 2D embedding
#'
#' Plot a 2D representation (embedding) of cells
#' @param embedding_mat Embedding
#' @param colour vector of variable to plot
#' @param randomize Logical. Whether to randomize data before plotting.
#'
#' @return ggplot2 object
#' @export
#'
plot_embedding <- function(embedding_mat,colour,randomize=T, size_point=0.05) {
# create a data frame that will have the embedding as well as the colors
embedding_obj <- data.frame(
Dim1=embedding_mat[,1],
Dim2=embedding_mat[,2],
Var=colour )
# randomize the order of the objects
if( randomize ) {
embedding_obj <- embedding_obj[ sample.int(nrow(embedding_obj)), ]
}
# create the plots
if(is.numeric(colour)) # the color variable is numeric
{
#embedding_obj$Var <- embedding_obj$Var - mean(embedding_obj$Var)
lim <- stats::quantile(abs(embedding_obj$Var),0.99)
ggplot2::ggplot(
embedding_obj, ggplot2::aes_string( x="Dim1", y="Dim2", colour="Var"))+
ggplot2::geom_point(size=size_point)+
ggplot2::theme_minimal()+
ggplot2::scale_color_gradientn( limits=c(-lim,lim), colours=c("blue","light grey","red"), oob=scales::squish )
} else {
ggplot2::ggplot(
embedding_obj, ggplot2::aes_string( x="Dim1", y="Dim2", colour="Var"))+
ggplot2::geom_point(size=size_point)+
ggplot2::theme_minimal()+
ggplot2::guides(colour=ggplot2::guide_legend(override.aes=list(size=3)))
}
}
summary_markers<- function(markers_sce){
lis_markers<- lapply(names(markers_sce), function(cluster_look) {
temp_df<- markers_sce[[cluster_look]]
temp_df$Gene<- rownames(temp_df)
temp_df$cluster<- cluster_look
# Checking if an FDR is equal to 0
if( any(temp_df$FDR == 0) ){
min_pvalue<- sort(unique(temp_df$FDR))[2] # take the second lowest pvalue
cat("pvalues equal to 0 have been changed to:", min_pvalue, "\n")
temp_df$FDR[temp_df$FDR == 0] <- min_pvalue # setting lower bound
}
temp_df$neglog_FDR<- -log10(temp_df$FDR)
temp_df<- temp_df[sort(rownames(temp_df)),]
data.frame(temp_df[,c("Gene", "cluster", "p.value", "FDR", "neglog_FDR")])
})
names(lis_markers)<- names(markers_sce)
return(lis_markers)
}
pheatmap.colorsymmetric <- function(x,lim=NULL,...)
{
require(pheatmap)
if(is.null(lim) ){
lim <- max(abs(x), na.rm=TRUE)
}
if( min(x, na.rm=TRUE) < 0 ){
lim_down<- -lim
col_palette<- colorRampPalette(c("blue","white","red"))(256)
}else{
lim_down<- 0
col_palette<- colorRampPalette(c("white","red"))(256)
}
pheatmap(
x, color = col_palette,
breaks=seq(lim_down,lim,length.out=255), ... )
}
# Reading SCE object
sce<- loadHDF5SummarizedExperiment(dir=dir_data_hdf5)
meta<- data.frame(colData(sce))
# Reading GEDI model
model<- readRDS(paste0(dir_data, "COVID19_gedi_model_cohort1_TF.rds"))
# reorder meta based on GEDI order
meta<- meta[model$aux$cellIDs,]
# Get activities per cell
ADB<- getADB.gedi(model)
# Get ZDB
ZDB<- getZDB.gedi(model)
# Get the gradient for all TFs
gradients <- getActivityGradients.gedi( model )
# Now, retrieve the differential gene expression per cell ( severe vs control)
t( model$aux$inputH)
(Intercept) group_per_samplemild group_per_samplesevere
C19-CB-0001 1 1 0
C19-CB-0003 1 1 0
C19-CB-0002 1 1 0
C19-CB-0005 1 1 0
C19-CB-0009 1 0 1
C19-CB-0012 1 0 1
C19-CB-0013 1 0 1
C19-CB-0011 1 0 1
C19-CB-0008 1 0 1
C19-CB-0020 1 0 1
C19-CB-0021 1 0 1
C19-CB-0016 1 0 1
C19-CB-0198 1 0 1
C19-CB-0204 1 1 0
C19-CB-0199 1 0 1
C19-CB-0214 1 1 0
C19-CB-0053 1 1 0
C19-CB-0052 1 1 0
P18F 1 0 0
P17H 1 0 0
P20H 1 0 0
P15F 1 0 0
P08H 1 0 0
P13H 1 0 0
P07H 1 0 0
P06F 1 0 0
P04H 1 0 0
C2P01H 1 0 0
P09H 1 0 0
P02H 1 0 0
C2P05F 1 0 0
C2P07H 1 0 0
C2P13F 1 0 0
C2P16H 1 0 0
C2P10H 1 0 0
C2P19H 1 0 0
C2P15H 1 0 0
one_k_v3 1 0 0
Five_k_v3 1 0 0
Ten_k_v3 1 0 0
DiffExp <- getDiffExp.gedi( model, c(0,0,1) )
meta$velocity_severe<- colSums(DiffExp^2)
Estimate Dot Product and Cosine Similarity
dotprod <- crossprod(DiffExp,gradients) # to get cosine similarity, first calculate dot product
cosineSim <- dotprod / sqrt(colSums(DiffExp^2)) # then, divide by the length of the expression vectors
cosineSim <- t( t(cosineSim) / sqrt(colSums(gradients^2)) ) # and also divide by the length of the
# The gradient vector of each TF is first normalized to have a length of one (by dividing by the Euclidean length of the vector)
gradients_norm<- scale(gradients, center=FALSE, scale=apply(gradients, 2, norm, type="2") )
dotprod_norm <- crossprod(DiffExp,gradients_norm) # to get cosine similarity, first calculate dot product
# Choosing TF
tf<- "SPI1"
set.seed(43)
## TF gradient
C<- model$aux$inputC # Get input C matrix
Cindex <- which(colnames(C)==tf)
vectorField <- svd.joint_vectorField_gradient.gedi(
model, start.cond = c(1,0,0), end.cond = c(1,0,1), Cindex, scale_cond_vector = 0.5 )
Gradient vectors will be scaled by a factor of 0.000394667923765283.
# Euclidean distance
umap_vectorField <- umap(
vectorField$v %*% diag(vectorField$d), min_dist=0.5,
metric="euclidean")
## Cell type embedding indices
ggp<- plot_embedding( umap_vectorField[vectorField$embedding_indices,], meta$id.celltype) +
theme_void() +
theme(legend.position ="none")
Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
Warning: Please use tidy evaluation idioms with `aes()`.
Warning: See also `vignette("ggplot2-in-packages")` for more information.
ggp
## Saving the colors
g <- ggplot_build(ggp)
df<- g$data[[1]]
df<- unique(df[,c("group", "colour")])
df<- df[order(df$group),]
temp_vec<- levels(meta$id.celltype)
temp_vec<- temp_vec[temp_vec %in% unique(meta$id.celltype)]
df$celltype<- temp_vec
vec_colors<- df$colour
names(vec_colors)<- df$celltype
ggp<- plot_embedding( umap_vectorField[vectorField$embedding_indices,], meta$id.celltype) +
theme_void() +
theme(legend.position ="right")
legend <- cowplot::get_legend(ggp)
grid.newpage()
grid.draw(legend)
## covid vector field with the speed
ggp<- plot_vectorField( umap_vectorField[vectorField$vectorField_indices,], meta$velocity, minNum=15 ) +
theme_void() +
labs(title="Vector field of severe COVID-19") +
theme(legend.position ="right")
ggp
## TF gradient with TF activity
ggp<- plot_vectorField( umap_vectorField[vectorField$gradient_indices,], ADB[tf,], minNum=15 ) +
theme_void() +
labs(title=paste0("TF activity:", tf)) +
theme(legend.position ="right")
ggp
## UMAP plot with TF activity
ggp<- plot_embedding( umap_vectorField[vectorField$embedding_indices,], ADB[tf,]) +
theme_void() +
labs(title=paste0("TF activity:", tf)) +
theme(legend.position="right")
ggp
sessionInfo()
R version 4.0.0 (2020-04-24)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: CentOS Linux 7 (Core)
Matrix products: default
BLAS/LAPACK: /cvmfs/soft.computecanada.ca/easybuild/software/2020/Core/imkl/2020.1.217/compilers_and_libraries_2020.1.217/linux/mkl/lib/intel64_lin/libmkl_gf_lp64.so
locale:
[1] LC_CTYPE=en_CA.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_CA.UTF-8 LC_COLLATE=en_CA.UTF-8
[5] LC_MONETARY=en_CA.UTF-8 LC_MESSAGES=en_CA.UTF-8
[7] LC_PAPER=en_CA.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] grid parallel stats4 stats graphics grDevices utils
[8] datasets methods base
other attached packages:
[1] GEDI_0.0.0.9000 gridExtra_2.3
[3] uwot_0.1.10 RColorBrewer_1.1-2
[5] pheatmap_1.0.12 HDF5Array_1.18.1
[7] rhdf5_2.34.0 DelayedArray_0.16.3
[9] Matrix_1.3-3 scater_1.18.6
[11] ggplot2_3.4.2 scran_1.18.6
[13] SingleCellExperiment_1.12.0 SummarizedExperiment_1.20.0
[15] Biobase_2.50.0 GenomicRanges_1.42.0
[17] GenomeInfoDb_1.26.7 IRanges_2.24.1
[19] S4Vectors_0.28.1 BiocGenerics_0.36.0
[21] MatrixGenerics_1.2.1 matrixStats_0.58.0
loaded via a namespace (and not attached):
[1] bitops_1.0-6 RcppAnnoy_0.0.18
[3] backports_1.2.1 tools_4.0.0
[5] bslib_0.2.4 utf8_1.2.1
[7] R6_2.5.0 irlba_2.3.3
[9] vipor_0.4.5 colorspace_2.0-0
[11] rhdf5filters_1.2.0 withr_2.5.0
[13] tidyselect_1.2.0 compiler_4.0.0
[15] cli_3.6.1 BiocNeighbors_1.8.2
[17] labeling_0.4.2 sass_0.3.1
[19] checkmate_2.1.0 scales_1.2.1
[21] metR_0.13.0 stringr_1.5.0
[23] digest_0.6.27 rmarkdown_2.7
[25] XVector_0.30.0 pkgconfig_2.0.3
[27] htmltools_0.5.1.1 sparseMatrixStats_1.2.1
[29] highr_0.8 fastmap_1.1.0
[31] limma_3.46.0 rlang_1.1.0
[33] DelayedMatrixStats_1.12.3 farver_2.1.0
[35] jquerylib_0.1.3 generics_0.1.3
[37] jsonlite_1.8.4 BiocParallel_1.24.1
[39] dplyr_1.1.1 RCurl_1.98-1.3
[41] magrittr_2.0.3 BiocSingular_1.6.0
[43] GenomeInfoDbData_1.2.4 scuttle_1.0.4
[45] Rcpp_1.0.8.3 ggbeeswarm_0.6.0
[47] munsell_0.5.0 Rhdf5lib_1.12.1
[49] fansi_0.4.2 viridis_0.5.1
[51] lifecycle_1.0.3 stringi_1.5.3
[53] yaml_2.2.1 edgeR_3.32.1
[55] zlibbioc_1.36.0 plyr_1.8.6
[57] dqrng_0.2.1 lattice_0.20-41
[59] cowplot_1.1.1 beachmat_2.6.4
[61] locfit_1.5-9.4 knitr_1.31
[63] pillar_1.8.1 igraph_1.3.4
[65] codetools_0.2-16 glue_1.6.2
[67] evaluate_0.14 data.table_1.14.0
[69] vctrs_0.6.1 gtable_0.3.0
[71] cachem_1.0.4 xfun_0.22
[73] rsvd_1.0.5 RcppEigen_0.3.3.9.1
[75] RSpectra_0.16-0 viridisLite_0.3.0
[77] tibble_3.2.1 memoise_2.0.0
[79] beeswarm_0.3.1 bluster_1.0.0
[81] statmod_1.4.35